import pandas as pd
—
title: “Summer Program: Python Programming _ Day2 Quiz” author: “김보람” date: “07/25/2023”
lesson 7
1
from IPython.core.display import HTML
'<table border="1" class="dataframe">\n <thead>\n <tr style="text-align: right;">\n <th></th>\n <th>A</th>\n <th>B</th>\n </tr>\n </thead>\n <tbody>\n <tr>\n <th>0</th>\n <td>1</td>\n <td>-2</td>\n </tr>\n <tr>\n <th>1</th>\n <td>2</td>\n <td>-3</td>\n </tr>\n <tr>\n <th>2</th>\n <td>3</td>\n <td>-4</td>\n </tr>\n </tbody>\n</table>') HTML(
A | B | |
---|---|---|
0 | 1 | -2 |
1 | 2 | -3 |
2 | 3 | -4 |
= pd.DataFrame({'A':[1,2,3],'B':[-2,-3,-4]})
df df
A | B | |
---|---|---|
0 | 1 | -2 |
1 | 2 | -3 |
2 | 3 | -4 |
2
= ['X1','X2']
df.columns df
X1 | X2 | |
---|---|---|
0 | 1 | -2 |
1 | 2 | -3 |
2 | 3 | -4 |
3
= pd.DataFrame(np.random.normal(size=(100,5)),columns=list('ABCDE'))
df df
A | B | C | D | E | |
---|---|---|---|---|---|
0 | 1.463910 | -0.189044 | -1.154230 | -1.063342 | 1.027457 |
1 | 0.821139 | -0.511394 | -3.710588 | 1.160104 | -0.107635 |
2 | -0.732671 | -1.033050 | -0.295740 | 0.282198 | -0.215732 |
3 | 1.598415 | -2.190977 | -0.803828 | -0.956396 | -0.174917 |
4 | -0.185002 | 1.154373 | -0.236004 | -2.128459 | -1.623930 |
... | ... | ... | ... | ... | ... |
95 | -1.085273 | 0.428166 | 0.744181 | 0.533127 | -0.263671 |
96 | -0.528071 | 1.460818 | -0.773037 | -0.355642 | 1.661265 |
97 | -0.044476 | -0.269066 | 0.018526 | -0.318183 | 0.990264 |
98 | -0.324435 | 0.583649 | 1.500259 | 0.257866 | -2.036244 |
99 | 2.104528 | 0.380031 | -0.353625 | -1.402250 | -1.109209 |
100 rows × 5 columns
'B','D']] df[[
B | D | |
---|---|---|
0 | -0.189044 | -1.063342 |
1 | -0.511394 | 1.160104 |
2 | -1.033050 | 0.282198 |
3 | -2.190977 | -0.956396 |
4 | 1.154373 | -2.128459 |
... | ... | ... |
95 | 0.428166 | 0.533127 |
96 | 1.460818 | -0.355642 |
97 | -0.269066 | -0.318183 |
98 | 0.583649 | 0.257866 |
99 | 0.380031 | -1.402250 |
100 rows × 2 columns
4
-10:] df[
A | B | C | D | E | |
---|---|---|---|---|---|
90 | -0.167761 | -0.664358 | 1.867122 | -0.177171 | 0.197525 |
91 | 0.377890 | -1.048871 | 2.145238 | 0.424379 | 0.525715 |
92 | 0.696097 | -0.948450 | 0.611477 | 1.591229 | 0.091939 |
93 | -0.419354 | 1.160804 | -0.993989 | 0.987491 | -1.580077 |
94 | 1.178698 | -0.141507 | -0.717452 | 1.447968 | 0.585297 |
95 | -1.085273 | 0.428166 | 0.744181 | 0.533127 | -0.263671 |
96 | -0.528071 | 1.460818 | -0.773037 | -0.355642 | 1.661265 |
97 | -0.044476 | -0.269066 | 0.018526 | -0.318183 | 0.990264 |
98 | -0.324435 | 0.583649 | 1.500259 | 0.257866 | -2.036244 |
99 | 2.104528 | 0.380031 | -0.353625 | -1.402250 | -1.109209 |
5
'A','B']][:10] df[[
A | B | |
---|---|---|
0 | 1.463910 | -0.189044 |
1 | 0.821139 | -0.511394 |
2 | -0.732671 | -1.033050 |
3 | 1.598415 | -2.190977 |
4 | -0.185002 | 1.154373 |
5 | 1.410994 | -1.352321 |
6 | 0.547868 | 0.647899 |
7 | -2.993996 | -0.656354 |
8 | 0.016301 | 0.654041 |
9 | -0.973628 | 0.756796 |
'A','B']].iloc[:10] df.loc[:,[
A | B | |
---|---|---|
0 | 1.463910 | -0.189044 |
1 | 0.821139 | -0.511394 |
2 | -0.732671 | -1.033050 |
3 | 1.598415 | -2.190977 |
4 | -0.185002 | 1.154373 |
5 | 1.410994 | -1.352321 |
6 | 0.547868 | 0.647899 |
7 | -2.993996 | -0.656354 |
8 | 0.016301 | 0.654041 |
9 | -0.973628 | 0.756796 |
6
=pd.read_csv('https://raw.githubusercontent.com/PacktPublishing/Pandas-Cookbook/master/data/movie.csv')
df df
color | director_name | num_critic_for_reviews | duration | director_facebook_likes | actor_3_facebook_likes | actor_2_name | actor_1_facebook_likes | gross | genres | ... | num_user_for_reviews | language | country | content_rating | budget | title_year | actor_2_facebook_likes | imdb_score | aspect_ratio | movie_facebook_likes | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Color | James Cameron | 723.0 | 178.0 | 0.0 | 855.0 | Joel David Moore | 1000.0 | 760505847.0 | Action|Adventure|Fantasy|Sci-Fi | ... | 3054.0 | English | USA | PG-13 | 237000000.0 | 2009.0 | 936.0 | 7.9 | 1.78 | 33000 |
1 | Color | Gore Verbinski | 302.0 | 169.0 | 563.0 | 1000.0 | Orlando Bloom | 40000.0 | 309404152.0 | Action|Adventure|Fantasy | ... | 1238.0 | English | USA | PG-13 | 300000000.0 | 2007.0 | 5000.0 | 7.1 | 2.35 | 0 |
2 | Color | Sam Mendes | 602.0 | 148.0 | 0.0 | 161.0 | Rory Kinnear | 11000.0 | 200074175.0 | Action|Adventure|Thriller | ... | 994.0 | English | UK | PG-13 | 245000000.0 | 2015.0 | 393.0 | 6.8 | 2.35 | 85000 |
3 | Color | Christopher Nolan | 813.0 | 164.0 | 22000.0 | 23000.0 | Christian Bale | 27000.0 | 448130642.0 | Action|Thriller | ... | 2701.0 | English | USA | PG-13 | 250000000.0 | 2012.0 | 23000.0 | 8.5 | 2.35 | 164000 |
4 | NaN | Doug Walker | NaN | NaN | 131.0 | NaN | Rob Walker | 131.0 | NaN | Documentary | ... | NaN | NaN | NaN | NaN | NaN | NaN | 12.0 | 7.1 | NaN | 0 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
4911 | Color | Scott Smith | 1.0 | 87.0 | 2.0 | 318.0 | Daphne Zuniga | 637.0 | NaN | Comedy|Drama | ... | 6.0 | English | Canada | NaN | NaN | 2013.0 | 470.0 | 7.7 | NaN | 84 |
4912 | Color | NaN | 43.0 | 43.0 | NaN | 319.0 | Valorie Curry | 841.0 | NaN | Crime|Drama|Mystery|Thriller | ... | 359.0 | English | USA | TV-14 | NaN | NaN | 593.0 | 7.5 | 16.00 | 32000 |
4913 | Color | Benjamin Roberds | 13.0 | 76.0 | 0.0 | 0.0 | Maxwell Moody | 0.0 | NaN | Drama|Horror|Thriller | ... | 3.0 | English | USA | NaN | 1400.0 | 2013.0 | 0.0 | 6.3 | NaN | 16 |
4914 | Color | Daniel Hsia | 14.0 | 100.0 | 0.0 | 489.0 | Daniel Henney | 946.0 | 10443.0 | Comedy|Drama|Romance | ... | 9.0 | English | USA | PG-13 | NaN | 2012.0 | 719.0 | 6.3 | 2.35 | 660 |
4915 | Color | Jon Gunn | 43.0 | 90.0 | 16.0 | 16.0 | Brian Herzlinger | 86.0 | 85222.0 | Documentary | ... | 84.0 | English | USA | PG | 1100.0 | 2004.0 | 23.0 | 6.6 | 1.85 | 456 |
4916 rows × 28 columns
len(df.columns)
28
7
= df.columns
index index
Index(['color', 'director_name', 'num_critic_for_reviews', 'duration',
'director_facebook_likes', 'actor_3_facebook_likes', 'actor_2_name',
'actor_1_facebook_likes', 'gross', 'genres', 'actor_1_name',
'movie_title', 'num_voted_users', 'cast_total_facebook_likes',
'actor_3_name', 'facenumber_in_poster', 'plot_keywords',
'movie_imdb_link', 'num_user_for_reviews', 'language', 'country',
'content_rating', 'budget', 'title_year', 'actor_2_facebook_likes',
'imdb_score', 'aspect_ratio', 'movie_facebook_likes'],
dtype='object')
for l in index if l[0]=='c' or l[0]=='d'] [l
['color',
'director_name',
'duration',
'director_facebook_likes',
'cast_total_facebook_likes',
'country',
'content_rating']
len([l for l in index if l[0]=='c' or l[0]=='d'])
7
8
for l in df.columns if 'actor' in l] [l
['actor_3_facebook_likes',
'actor_2_name',
'actor_1_facebook_likes',
'actor_1_name',
'actor_3_name',
'actor_2_facebook_likes']
len([l for l in df.columns if 'actor' in l])
6
9
'actor' in l for l in df.columns]] df.loc[:, [
actor_3_facebook_likes | actor_2_name | actor_1_facebook_likes | actor_1_name | actor_3_name | actor_2_facebook_likes | |
---|---|---|---|---|---|---|
0 | 855.0 | Joel David Moore | 1000.0 | CCH Pounder | Wes Studi | 936.0 |
1 | 1000.0 | Orlando Bloom | 40000.0 | Johnny Depp | Jack Davenport | 5000.0 |
2 | 161.0 | Rory Kinnear | 11000.0 | Christoph Waltz | Stephanie Sigman | 393.0 |
3 | 23000.0 | Christian Bale | 27000.0 | Tom Hardy | Joseph Gordon-Levitt | 23000.0 |
4 | NaN | Rob Walker | 131.0 | Doug Walker | NaN | 12.0 |
... | ... | ... | ... | ... | ... | ... |
4911 | 318.0 | Daphne Zuniga | 637.0 | Eric Mabius | Crystal Lowe | 470.0 |
4912 | 319.0 | Valorie Curry | 841.0 | Natalie Zea | Sam Underwood | 593.0 |
4913 | 0.0 | Maxwell Moody | 0.0 | Eva Boehnke | David Chandler | 0.0 |
4914 | 489.0 | Daniel Henney | 946.0 | Alan Ruck | Eliza Coupe | 719.0 |
4915 | 16.0 | Brian Herzlinger | 86.0 | John August | Jon Gunn | 23.0 |
4916 rows × 6 columns
lesson 9
20230426)
np.random.seed(= ['2023-04-24(Mon)','2023-04-25(Tue)','2023-04-26(Wed)','2023-04-27(Thu)','2023-04-28(Fri)',
day '2023-05-01(Mon)','2023-05-02(Tue)','2023-05-03(Wed)','2023-05-04(Thu)','2023-05-05(Fri)',
'2023-05-08(Mon)','2023-05-09(Tue)','2023-05-10(Wed)','2023-05-11(Thu)','2023-05-12(Fri)']
= np.random.randn(15).cumsum()*2
hours1 = hours1 - hours1.min() +1
hours1 = np.random.randn(15).cumsum()*2
hours2 = hours2 - hours2.min() +1
hours2
= pd.DataFrame({'hours(R)':hours1, 'hours(Python)':hours2},index=day)
df df
hours(R) | hours(Python) | |
---|---|---|
2023-04-24(Mon) | 11.064829 | 9.254671 |
2023-04-25(Tue) | 9.790750 | 7.327548 |
2023-04-26(Wed) | 5.993362 | 9.185495 |
2023-04-27(Thu) | 7.542498 | 12.525569 |
2023-04-28(Fri) | 8.598600 | 10.906909 |
2023-05-01(Mon) | 6.933549 | 9.865538 |
2023-05-02(Tue) | 6.456987 | 11.081043 |
2023-05-03(Wed) | 4.976548 | 10.240239 |
2023-05-04(Thu) | 6.021139 | 5.822405 |
2023-05-05(Fri) | 1.851839 | 5.522484 |
2023-05-08(Mon) | 1.000000 | 4.319094 |
2023-05-09(Tue) | 1.350073 | 1.000000 |
2023-05-10(Wed) | 3.138700 | 2.633662 |
2023-05-11(Thu) | 3.153756 | 4.870860 |
2023-05-12(Fri) | 1.353976 | 1.785441 |
1
= [l.replace(')','').split('(') for l in df.index] index
index
[['2023-04-24', 'Mon'],
['2023-04-25', 'Tue'],
['2023-04-26', 'Wed'],
['2023-04-27', 'Thu'],
['2023-04-28', 'Fri'],
['2023-05-01', 'Mon'],
['2023-05-02', 'Tue'],
['2023-05-03', 'Wed'],
['2023-05-04', 'Thu'],
['2023-05-05', 'Fri'],
['2023-05-08', 'Mon'],
['2023-05-09', 'Tue'],
['2023-05-10', 'Wed'],
['2023-05-11', 'Thu'],
['2023-05-12', 'Fri']]
= [day for day, _ in index], weekday = [weekday for _, weekday in index]) df.assign(day
hours(R) | hours(Python) | day | weekday | |
---|---|---|---|---|
2023-04-24(Mon) | 11.064829 | 9.254671 | 2023-04-24 | Mon |
2023-04-25(Tue) | 9.790750 | 7.327548 | 2023-04-25 | Tue |
2023-04-26(Wed) | 5.993362 | 9.185495 | 2023-04-26 | Wed |
2023-04-27(Thu) | 7.542498 | 12.525569 | 2023-04-27 | Thu |
2023-04-28(Fri) | 8.598600 | 10.906909 | 2023-04-28 | Fri |
2023-05-01(Mon) | 6.933549 | 9.865538 | 2023-05-01 | Mon |
2023-05-02(Tue) | 6.456987 | 11.081043 | 2023-05-02 | Tue |
2023-05-03(Wed) | 4.976548 | 10.240239 | 2023-05-03 | Wed |
2023-05-04(Thu) | 6.021139 | 5.822405 | 2023-05-04 | Thu |
2023-05-05(Fri) | 1.851839 | 5.522484 | 2023-05-05 | Fri |
2023-05-08(Mon) | 1.000000 | 4.319094 | 2023-05-08 | Mon |
2023-05-09(Tue) | 1.350073 | 1.000000 | 2023-05-09 | Tue |
2023-05-10(Wed) | 3.138700 | 2.633662 | 2023-05-10 | Wed |
2023-05-11(Thu) | 3.153756 | 4.870860 | 2023-05-11 | Thu |
2023-05-12(Fri) | 1.353976 | 1.785441 | 2023-05-12 | Fri |
= df.assign(day = [day for day, _ in index], weekday = [weekday for _, weekday in index]).reset_index().loc[:,'hours(R)':]
df df
hours(R) | hours(Python) | day | weekday | |
---|---|---|---|---|
0 | 11.064829 | 9.254671 | 2023-04-24 | Mon |
1 | 9.790750 | 7.327548 | 2023-04-25 | Tue |
2 | 5.993362 | 9.185495 | 2023-04-26 | Wed |
3 | 7.542498 | 12.525569 | 2023-04-27 | Thu |
4 | 8.598600 | 10.906909 | 2023-04-28 | Fri |
5 | 6.933549 | 9.865538 | 2023-05-01 | Mon |
6 | 6.456987 | 11.081043 | 2023-05-02 | Tue |
7 | 4.976548 | 10.240239 | 2023-05-03 | Wed |
8 | 6.021139 | 5.822405 | 2023-05-04 | Thu |
9 | 1.851839 | 5.522484 | 2023-05-05 | Fri |
10 | 1.000000 | 4.319094 | 2023-05-08 | Mon |
11 | 1.350073 | 1.000000 | 2023-05-09 | Tue |
12 | 3.138700 | 2.633662 | 2023-05-10 | Wed |
13 | 3.153756 | 4.870860 | 2023-05-11 | Thu |
14 | 1.353976 | 1.785441 | 2023-05-12 | Fri |
2
'day < "2023-05-01"')['hours(R)'].sum() df.query(
42.99003889835529
'day < "2023-05-01"')['hours(Python)'].sum() df.query(
49.20019054928582
- python
3
== s, 'hours(R)'].sum() for s in set(df.weekday)} {s:df.loc[df.weekday
{'Thu': 16.717393020928853,
'Mon': 18.99837797631909,
'Wed': 14.10860912003022,
'Fri': 11.804415159359687,
'Tue': 17.597810683605076}
4
== s, 'hours(R)'].sum() for s in set(df.weekday)} {s:df.loc[df.weekday
{'Thu': 16.717393020928853,
'Mon': 18.99837797631909,
'Wed': 14.10860912003022,
'Fri': 11.804415159359687,
'Tue': 17.597810683605076}
== s, 'hours(Python)'].sum() for s in set(df.weekday)} {s:df.loc[df.weekday
{'Thu': 23.21883427607518,
'Mon': 23.43930191418699,
'Wed': 22.05939511920843,
'Fri': 18.214832977059253,
'Tue': 19.408591083196185}
== s,'hours(R)',].sum()+df.loc[df.weekday == s,'hours(Python)'].sum() for s in set(df.weekday)} {s:df.loc[df.weekday
{'Thu': 39.93622729700404,
'Mon': 42.43767989050608,
'Wed': 36.16800423923865,
'Fri': 30.019248136418938,
'Tue': 37.00640176680126}
5
import matplotlib.pyplot as plt
= df['hours(R)'] - df['hours(Python)']
y '--o') plt.plot(y,
lesson 10
= pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/FIFA23_official_data.csv')
df =3) df.head(n
ID | Name | Age | Photo | Nationality | Flag | Overall | Potential | Club | Club Logo | ... | Real Face | Position | Joined | Loaned From | Contract Valid Until | Height | Weight | Release Clause | Kit Number | Best Overall Rating | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 209658 | L. Goretzka | 27 | https://cdn.sofifa.net/players/209/658/23_60.png | Germany | https://cdn.sofifa.net/flags/de.png | 87 | 88 | FC Bayern München | https://cdn.sofifa.net/teams/21/30.png | ... | Yes | <span class="pos pos28">SUB | Jul 1, 2018 | NaN | 2026 | 189cm | 82kg | €157M | 8.0 | NaN |
1 | 212198 | Bruno Fernandes | 27 | https://cdn.sofifa.net/players/212/198/23_60.png | Portugal | https://cdn.sofifa.net/flags/pt.png | 86 | 87 | Manchester United | https://cdn.sofifa.net/teams/11/30.png | ... | Yes | <span class="pos pos15">LCM | Jan 30, 2020 | NaN | 2026 | 179cm | 69kg | €155M | 8.0 | NaN |
2 | 224334 | M. Acuña | 30 | https://cdn.sofifa.net/players/224/334/23_60.png | Argentina | https://cdn.sofifa.net/flags/ar.png | 85 | 85 | Sevilla FC | https://cdn.sofifa.net/teams/481/30.png | ... | No | <span class="pos pos7">LB | Sep 14, 2020 | NaN | 2024 | 172cm | 69kg | €97.7M | 19.0 | NaN |
3 rows × 29 columns
1
df.Position
0 <span class="pos pos28">SUB
1 <span class="pos pos15">LCM
2 <span class="pos pos7">LB
3 <span class="pos pos13">RCM
4 <span class="pos pos13">RCM
...
17655 <span class="pos pos29">RES
17656 <span class="pos pos29">RES
17657 <span class="pos pos29">RES
17658 <span class="pos pos29">RES
17659 <span class="pos pos29">RES
Name: Position, Length: 17660, dtype: object
'Position'] df.loc[:,
0 <span class="pos pos28">SUB
1 <span class="pos pos15">LCM
2 <span class="pos pos7">LB
3 <span class="pos pos13">RCM
4 <span class="pos pos13">RCM
...
17655 <span class="pos pos29">RES
17656 <span class="pos pos29">RES
17657 <span class="pos pos29">RES
17658 <span class="pos pos29">RES
17659 <span class="pos pos29">RES
Name: Position, Length: 17660, dtype: object
= df["Position"]
s s
0 <span class="pos pos28">SUB
1 <span class="pos pos15">LCM
2 <span class="pos pos7">LB
3 <span class="pos pos13">RCM
4 <span class="pos pos13">RCM
...
17655 <span class="pos pos29">RES
17656 <span class="pos pos29">RES
17657 <span class="pos pos29">RES
17658 <span class="pos pos29">RES
17659 <span class="pos pos29">RES
Name: Position, Length: 17660, dtype: object
2
= s.dropna()
s s
0 <span class="pos pos28">SUB
1 <span class="pos pos15">LCM
2 <span class="pos pos7">LB
3 <span class="pos pos13">RCM
4 <span class="pos pos13">RCM
...
17655 <span class="pos pos29">RES
17656 <span class="pos pos29">RES
17657 <span class="pos pos29">RES
17658 <span class="pos pos29">RES
17659 <span class="pos pos29">RES
Name: Position, Length: 17625, dtype: object
'<span class="pos pos28">SUB'
3
= {
position_dict 'GOALKEEPER':['GK'],
'DEFENDER':['CB','RCB','LCB','RB','LB','RWB','LWB'],
'MIDFIELDER':['CM','RCM','LCM','CDM','RDM','LDM','CAM','RAM','LAM','RM','LM'],
'FORWARD':['ST','CF','RF','LF','RW','LW','RS','LS'],
'SUB':['SUB'],
'RES':['RES']
}
= list(map(lambda x: x.split('>')[-1],s)) lst
apply(lambda x: x.split('>')[-1]) s.
0 SUB
1 LCM
2 LB
3 RCM
4 RCM
...
17655 RES
17656 RES
17657 RES
17658 RES
17659 RES
Name: Position, Length: 17625, dtype: object
for k,v in position_dict.items()] [(k,v)
[('GOALKEEPER', ['GK']),
('DEFENDER', ['CB', 'RCB', 'LCB', 'RB', 'LB', 'RWB', 'LWB']),
('MIDFIELDER',
['CM', 'RCM', 'LCM', 'CDM', 'RDM', 'LDM', 'CAM', 'RAM', 'LAM', 'RM', 'LM']),
('FORWARD', ['ST', 'CF', 'RF', 'LF', 'RW', 'LW', 'RS', 'LS']),
('SUB', ['SUB']),
('RES', ['RES'])]
for l in s.apply(lambda x: x.split('>')[-1]) for k,v in position_dict.items() if l in v] [k
['SUB',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'DEFENDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'DEFENDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'RES',
'SUB',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'RES',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'RES',
'SUB',
'DEFENDER',
'FORWARD',
'DEFENDER',
'FORWARD',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'RES',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'FORWARD',
'DEFENDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'FORWARD',
'SUB',
'SUB',
'SUB',
'FORWARD',
'MIDFIELDER',
'SUB',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'SUB',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'RES',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'FORWARD',
'FORWARD',
'FORWARD',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'DEFENDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'RES',
'FORWARD',
'SUB',
'SUB',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'SUB',
'DEFENDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'FORWARD',
'SUB',
'DEFENDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'RES',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'RES',
'RES',
'FORWARD',
'SUB',
'FORWARD',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'SUB',
'RES',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'SUB',
'RES',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'RES',
'SUB',
'SUB',
'FORWARD',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'FORWARD',
'FORWARD',
'SUB',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'RES',
'FORWARD',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'SUB',
'RES',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'SUB',
'FORWARD',
'DEFENDER',
'SUB',
'DEFENDER',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'SUB',
'FORWARD',
'DEFENDER',
'SUB',
'SUB',
'FORWARD',
'SUB',
'MIDFIELDER',
'FORWARD',
'FORWARD',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'SUB',
'FORWARD',
'SUB',
'SUB',
'RES',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'RES',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'SUB',
'DEFENDER',
'FORWARD',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'RES',
'FORWARD',
'RES',
'SUB',
'FORWARD',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'FORWARD',
'SUB',
'FORWARD',
'SUB',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'DEFENDER',
'SUB',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'RES',
'FORWARD',
'SUB',
'SUB',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'SUB',
'DEFENDER',
'RES',
'FORWARD',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'DEFENDER',
'MIDFIELDER',
'RES',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'RES',
'SUB',
'FORWARD',
'RES',
'RES',
'DEFENDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'FORWARD',
'SUB',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'SUB',
'FORWARD',
'FORWARD',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'SUB',
'FORWARD',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'RES',
'FORWARD',
'FORWARD',
'FORWARD',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'DEFENDER',
'DEFENDER',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'RES',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'RES',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'SUB',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'SUB',
'RES',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'RES',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'FORWARD',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'RES',
'DEFENDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'MIDFIELDER',
'DEFENDER',
'MIDFIELDER',
'MIDFIELDER',
'DEFENDER',
'SUB',
'SUB',
'SUB',
'MIDFIELDER',
'FORWARD',
'SUB',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'DEFENDER',
'SUB',
'RES',
'FORWARD',
'FORWARD',
'DEFENDER',
'RES',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'SUB',
'FORWARD',
'MIDFIELDER',
'FORWARD',
'RES',
'DEFENDER',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'SUB',
'DEFENDER',
'MIDFIELDER',
'RES',
'DEFENDER',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'FORWARD',
'DEFENDER',
'MIDFIELDER',
'SUB',
'MIDFIELDER',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
'RES',
'MIDFIELDER',
'RES',
'MIDFIELDER',
'SUB',
'DEFENDER',
'SUB',
'MIDFIELDER',
'FORWARD',
'RES',
'FORWARD',
'MIDFIELDER',
'MIDFIELDER',
'SUB',
...]
# list(map(lambda l : [k for l in s for k,v in position_dict.items() if l in v],s))
4
df.Age
0 27
1 27
2 30
3 31
4 25
..
17655 19
17656 17
17657 25
17658 18
17659 20
Name: Age, Length: 17660, dtype: int64
df.Age.mean()
23.127746319365798
list(map(lambda x: 'OB' if x>df.Age.mean() else 'YB', df.Age))
['OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'YB',
'YB',
'OB',
'YB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'YB',
'OB',
'OB',
'YB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
'OB',
...]
or
apply( df.Age.
5
BMI = 키 / 몸무게^2
apply( x[:-2]) df.Height.
df.Height
0 189cm
1 179cm
2 172cm
3 181cm
4 172cm
...
17655 190cm
17656 195cm
17657 190cm
17658 187cm
17659 186cm
Name: Height, Length: 17660, dtype: object
df.Weight
0 82kg
1 69kg
2 69kg
3 70kg
4 68kg
...
17655 78kg
17656 84kg
17657 82kg
17658 79kg
17659 78kg
Name: Weight, Length: 17660, dtype: object
= df.loc[:,'Height':'Weight'].applymap(lambda x: int(x[:-2])).eval('BMI=Height/Weight**2')
_df _df
Height | Weight | BMI | |
---|---|---|---|
0 | 189 | 82 | 0.028108 |
1 | 179 | 69 | 0.037597 |
2 | 172 | 69 | 0.036127 |
3 | 181 | 70 | 0.036939 |
4 | 172 | 68 | 0.037197 |
... | ... | ... | ... |
17655 | 190 | 78 | 0.031229 |
17656 | 195 | 84 | 0.027636 |
17657 | 190 | 82 | 0.028257 |
17658 | 187 | 79 | 0.029963 |
17659 | 186 | 78 | 0.030572 |
17660 rows × 3 columns
6
_df.BMI.mean()
0.03343718444596221
list(map(lambda x: 'H' if x>_df.BMI.mean() else 'L', _df.BMI))
['L',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'H',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'H',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'L',
'H',
'L',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'H',
'L',
'H',
'H',
'L',
'L',
'L',
'H',
'H',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'L',
'L',
'L',
'H',
'L',
'L',
'H',
'L',
'L',
'H',
'H',
'H',
'L',
'L',
'L',
'L',
'L',
'L',
'L',
'H',
'H',
'L',
'L',
'H',
'L',
'H',
'L',
...]
apply(lambda x: 'H' if x>_df.BMI.mean() else 'L') _df.BMI.
0 L
1 H
2 H
3 H
4 H
..
17655 L
17656 L
17657 L
17658 L
17659 L
Name: BMI, Length: 17660, dtype: object
lesson 11
lesson
= ['A']*5+['B']*5
_category = np.concatenate([np.random.randn(5), np.random.randn(5)+10])
_value = pd.DataFrame({'category':_category, 'value':_value})
df df
category | value | |
---|---|---|
0 | A | 1.086803 |
1 | A | 0.448341 |
2 | A | 0.575944 |
3 | A | 0.277854 |
4 | A | -1.924633 |
5 | B | 9.844358 |
6 | B | 9.819660 |
7 | B | 9.425310 |
8 | B | 10.191885 |
9 | B | 11.187810 |
"category").mean() df.groupby(
value | |
---|---|
category | |
A | 0.092862 |
B | 10.093805 |
"category").aggregate(np.mean) df.groupby(
value | |
---|---|
category | |
A | 0.092862 |
B | 10.093805 |
flights data
=pd.read_csv('https://raw.githubusercontent.com/PacktPublishing/Pandas-Cookbook/master/data/flights.csv')
df df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 58492 entries, 0 to 58491
Data columns (total 14 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 MONTH 58492 non-null int64
1 DAY 58492 non-null int64
2 WEEKDAY 58492 non-null int64
3 AIRLINE 58492 non-null object
4 ORG_AIR 58492 non-null object
5 DEST_AIR 58492 non-null object
6 SCHED_DEP 58492 non-null int64
7 DEP_DELAY 57659 non-null float64
8 AIR_TIME 57474 non-null float64
9 DIST 58492 non-null int64
10 SCHED_ARR 58492 non-null int64
11 ARR_DELAY 57474 non-null float64
12 DIVERTED 58492 non-null int64
13 CANCELLED 58492 non-null int64
dtypes: float64(3), int64(8), object(3)
memory usage: 6.2+ MB
-
예제1: 항공사(AIRLINE)별로 도착지연시간의(ARR_DELAY)의 평균을 구하라.
"AIRLINE")["ARR_DELAY"].mean() df.groupby(
AIRLINE
AA 5.542661
AS -0.833333
B6 8.692593
DL 0.339691
EV 7.034580
F9 13.630651
HA 4.972973
MQ 6.860591
NK 18.436070
OO 7.593463
UA 7.765755
US 1.681105
VX 5.348884
WN 6.397353
Name: ARR_DELAY, dtype: float64
"AIRLINE").aggregate({'ARR_DELAY':[np.mean,'count']}) df.groupby(
ARR_DELAY | ||
---|---|---|
mean | count | |
AIRLINE | ||
AA | 5.542661 | 8720 |
AS | -0.833333 | 768 |
B6 | 8.692593 | 540 |
DL | 0.339691 | 10539 |
EV | 7.034580 | 5697 |
F9 | 13.630651 | 1305 |
HA | 4.972973 | 111 |
MQ | 6.860591 | 3314 |
NK | 18.436070 | 1486 |
OO | 7.593463 | 6425 |
UA | 7.765755 | 7680 |
US | 1.681105 | 1593 |
VX | 5.348884 | 986 |
WN | 6.397353 | 8310 |
-
예제2: 항공사(AIRLINE)별로 비행취소건수(CANCELLED)의 합계를 구하라. 취소건수가 가장 높은 두개의 항공사는 어디인가?
"AIRLINE").aggregate({'CANCELLED':'sum'}).sort_values(by="CANCELLED", ascending=False) df.groupby(
CANCELLED | |
---|---|
AIRLINE | |
AA | 154 |
MQ | 152 |
EV | 146 |
OO | 142 |
UA | 93 |
WN | 93 |
DL | 38 |
NK | 25 |
US | 21 |
F9 | 10 |
VX | 6 |
B6 | 1 |
AS | 0 |
HA | 0 |
-
예제3: 항공사(AIRLINE)별로 비행취소율(CANCELLED)을 구하라. 비행취소율이 가장 높은 두개의 항공사는 어디인가?
"AIRLINE").aggregate({'CANCELLED':'mean'}).sort_values(by="CANCELLED", ascending=False) df.groupby(
CANCELLED | |
---|---|
AIRLINE | |
MQ | 0.043791 |
EV | 0.024923 |
OO | 0.021554 |
AA | 0.017303 |
NK | 0.016491 |
US | 0.013003 |
UA | 0.011935 |
WN | 0.011048 |
F9 | 0.007593 |
VX | 0.006042 |
DL | 0.003585 |
B6 | 0.001842 |
AS | 0.000000 |
HA | 0.000000 |
-
예제5: 아래는 운행거리의 요약통계량이다. 운행거리를 구간별로 [-np.inf,391,690,1199,np.inf]
와 같이 나눈뒤 비행취소건수와 취소율을 구하여라.
df.DIST.describe()
count 58492.000000
mean 872.900072
std 624.996805
min 67.000000
25% 391.000000
50% 690.000000
75% 1199.000000
max 4502.000000
Name: DIST, dtype: float64
= [-np.inf,391,690,1199,np.inf] _bin
= pd.cut(df.DIST,_bin)).groupby('DIST2').agg('mean') df.assign(DIST2
MONTH | DAY | WEEKDAY | SCHED_DEP | DEP_DELAY | AIR_TIME | DIST | SCHED_ARR | ARR_DELAY | DIVERTED | CANCELLED | |
---|---|---|---|---|---|---|---|---|---|---|---|
DIST2 | |||||||||||
(-inf, 391.0] | 6.120488 | 15.719539 | 3.901425 | 1450.911398 | 9.414477 | 44.863971 | 259.463094 | 1552.313161 | 6.320828 | 0.000814 | 0.022659 |
(391.0, 690.0] | 6.215019 | 15.736686 | 3.929315 | 1422.562246 | 10.105660 | 79.233721 | 547.309611 | 1563.083018 | 5.673505 | 0.002549 | 0.013503 |
(690.0, 1199.0] | 6.244324 | 15.643155 | 3.934032 | 1355.024318 | 12.076761 | 121.376579 | 909.723096 | 1573.918111 | 6.603278 | 0.002553 | 0.013637 |
(1199.0, inf] | 6.304648 | 15.710334 | 3.943070 | 1322.569298 | 12.077681 | 219.595209 | 1794.080413 | 1507.135670 | 4.616760 | 0.003484 | 0.010313 |
Quiz
= pd.read_csv('https://raw.githubusercontent.com/guebin/DV2022/master/posts/HRDataset_v14.csv')
df df
Employee_Name | EmpID | MarriedID | MaritalStatusID | GenderID | EmpStatusID | DeptID | PerfScoreID | FromDiversityJobFairID | Salary | ... | ManagerName | ManagerID | RecruitmentSource | PerformanceScore | EngagementSurvey | EmpSatisfaction | SpecialProjectsCount | LastPerformanceReview_Date | DaysLateLast30 | Absences | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | Adinolfi, Wilson K | 10026 | 0 | 0 | 1 | 1 | 5 | 4 | 0 | 62506 | ... | Michael Albert | 22.0 | Exceeds | 4.60 | 5 | 0 | 1/17/2019 | 0 | 1 | |
1 | Ait Sidi, Karthikeyan | 10084 | 1 | 1 | 1 | 5 | 3 | 3 | 0 | 104437 | ... | Simon Roup | 4.0 | Indeed | Fully Meets | 4.96 | 3 | 6 | 2/24/2016 | 0 | 17 |
2 | Akinkuolie, Sarah | 10196 | 1 | 1 | 0 | 5 | 5 | 3 | 0 | 64955 | ... | Kissy Sullivan | 20.0 | Fully Meets | 3.02 | 3 | 0 | 5/15/2012 | 0 | 3 | |
3 | Alagbe,Trina | 10088 | 1 | 1 | 0 | 1 | 5 | 3 | 0 | 64991 | ... | Elijiah Gray | 16.0 | Indeed | Fully Meets | 4.84 | 5 | 0 | 1/3/2019 | 0 | 15 |
4 | Anderson, Carol | 10069 | 0 | 2 | 0 | 5 | 5 | 3 | 0 | 50825 | ... | Webster Butler | 39.0 | Google Search | Fully Meets | 5.00 | 4 | 0 | 2/1/2016 | 0 | 2 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
306 | Woodson, Jason | 10135 | 0 | 0 | 1 | 1 | 5 | 3 | 0 | 65893 | ... | Kissy Sullivan | 20.0 | Fully Meets | 4.07 | 4 | 0 | 2/28/2019 | 0 | 13 | |
307 | Ybarra, Catherine | 10301 | 0 | 0 | 0 | 5 | 5 | 1 | 0 | 48513 | ... | Brannon Miller | 12.0 | Google Search | PIP | 3.20 | 2 | 0 | 9/2/2015 | 5 | 4 |
308 | Zamora, Jennifer | 10010 | 0 | 0 | 0 | 1 | 3 | 4 | 0 | 220450 | ... | Janet King | 2.0 | Employee Referral | Exceeds | 4.60 | 5 | 6 | 2/21/2019 | 0 | 16 |
309 | Zhou, Julia | 10043 | 0 | 0 | 0 | 1 | 3 | 3 | 0 | 89292 | ... | Simon Roup | 4.0 | Employee Referral | Fully Meets | 5.00 | 3 | 5 | 2/1/2019 | 0 | 11 |
310 | Zima, Colleen | 10271 | 0 | 4 | 0 | 1 | 5 | 3 | 0 | 45046 | ... | David Stanley | 14.0 | Fully Meets | 4.50 | 5 | 0 | 1/30/2019 | 0 | 2 |
311 rows × 36 columns
1
='RaceDesc').agg({'EmpID':'count'}) df.groupby(by
EmpID | |
---|---|
RaceDesc | |
American Indian or Alaska Native | 3 |
Asian | 29 |
Black or African American | 80 |
Hispanic | 1 |
Two or more races | 11 |
White | 187 |
2
’RaceDesc==White’의 성별(Sex)임금차이는 2000이상이다.
=['RaceDesc','Sex']).agg({'Salary':'mean'}) df.groupby(by
Salary | ||
---|---|---|
RaceDesc | Sex | |
American Indian or Alaska Native | F | 63436.500000 |
M | 70545.000000 | |
Asian | F | 67520.117647 |
M | 69939.416667 | |
Black or African American | F | 66963.829787 |
M | 85066.121212 | |
Hispanic | M | 83667.000000 |
Two or more races | F | 58068.500000 |
M | 62313.800000 | |
White | F | 68846.519231 |
M | 65334.132530 |
3
퇴직한사람(Termd==1)은 모두 104명이며 백인여성의 퇴직자수가 가장 많다.
==1).sum() (df.Termd
104
'RaceDesc','Sex']).agg({'Termd':'sum'}) df.groupby([
Termd | ||
---|---|---|
RaceDesc | Sex | |
American Indian or Alaska Native | F | 0 |
M | 0 | |
Asian | F | 6 |
M | 3 | |
Black or African American | F | 15 |
M | 14 | |
Hispanic | M | 0 |
Two or more races | F | 2 |
M | 1 | |
White | F | 37 |
M | 26 |
4
퇴직한사람중 아시아인의 비율은 10%가 넘지 않는다.
=['RaceDesc']).agg({'Termd':'sum'}) df.groupby(by
Termd | |
---|---|
RaceDesc | |
American Indian or Alaska Native | 0 |
Asian | 9 |
Black or African American | 29 |
Hispanic | 0 |
Two or more races | 3 |
White | 63 |
9/104
0.08653846153846154
5
[성별(Sex), 결혼유무(MarriedID)] 별 연봉(Salary)의 평균을 조사하라. 어떠한 그룹이 가장 평균연봉이 적은가?
'Sex','MarriedID']).agg({'Salary':'mean'}) df.groupby([
Salary | ||
---|---|---|
Sex | MarriedID | |
F | 0 | 66504.394231 |
1 | 69638.986111 | |
M | 0 | 70967.939759 |
1 | 70089.038462 |
6
[성별(Sex), 인종별(RaceDesc)] 별 연봉(Salary)의 중앙값(‘median’)을 조사하라. 연봉의 중앙값이 가장 높은 그룹은 무엇인가?
'Sex','RaceDesc']).agg({'Salary':'median'}) df.groupby([
Salary | ||
---|---|---|
Sex | RaceDesc | |
F | American Indian or Alaska Native | 63436.5 |
Asian | 63676.0 | |
Black or African American | 61584.0 | |
Two or more races | 57837.0 | |
White | 62405.0 | |
M | American Indian or Alaska Native | 70545.0 |
Asian | 64731.0 | |
Black or African American | 71339.0 | |
Hispanic | 83667.0 | |
Two or more races | 61568.0 | |
White | 61809.0 |
7
[성별(Sex), 인종별(RaceDesc)] 별 연봉(Salary)의 중앙값(‘median’)과 근무인원수를 함께 조사하라. 연봉의 중앙값이 가장 높은 그룹이 혜택을 받는 그룹이라고 느껴지는가?
'Sex','RaceDesc']).agg({'Salary':['median','count']}) df.groupby([
Salary | |||
---|---|---|---|
median | count | ||
Sex | RaceDesc | ||
F | American Indian or Alaska Native | 63436.5 | 2 |
Asian | 63676.0 | 17 | |
Black or African American | 61584.0 | 47 | |
Two or more races | 57837.0 | 6 | |
White | 62405.0 | 104 | |
M | American Indian or Alaska Native | 70545.0 | 1 |
Asian | 64731.0 | 12 | |
Black or African American | 71339.0 | 33 | |
Hispanic | 83667.0 | 1 | |
Two or more races | 61568.0 | 5 | |
White | 61809.0 | 83 |